-- 清理脚本：删除旧的 text-embedding 代理记录及孤立数据 (PostgreSQL)
-- 使用前请先备份：pg_dump -Fc yourdb > backup.dump

BEGIN;

-- 1. 统计即将删除的 text-embedding 记录数量
SELECT COUNT(*) AS to_delete_count FROM "KnowledgeBase" WHERE "EmbeddingType" = 'text-embedding';

-- 2. 删除旧 text-embedding 记录（统一模型后不再需要）
DELETE FROM "KnowledgeBase" WHERE "EmbeddingType" = 'text-embedding';

-- 3. 删除没有任何知识库引用的孤立文档块
DELETE FROM "DocumentChunks" dc
WHERE NOT EXISTS (
    SELECT 1 FROM "KnowledgeBase" kb WHERE kb."ChunkId" = dc."Id"
);

-- 4. 删除没有文档块的孤立文档
DELETE FROM "Documents" d
WHERE NOT EXISTS (
    SELECT 1 FROM "DocumentChunks" c WHERE c."DocumentId" = d."Id"
);

-- 5. 可选：重新统计 analyze
ANALYZE;

COMMIT;

-- 可选：执行物理空间回收（需要 superuser 或足够权限，并会锁表）
-- VACUUM (FULL, ANALYZE) "KnowledgeBase";
-- VACUUM (FULL, ANALYZE) "DocumentChunks";
-- VACUUM (FULL, ANALYZE) "Documents";

-- 验证剩余记录
SELECT 'remaining_knowledge' AS metric, COUNT(*) FROM "KnowledgeBase"
UNION ALL
SELECT 'remaining_chunks', COUNT(*) FROM "DocumentChunks"
UNION ALL
SELECT 'remaining_documents', COUNT(*) FROM "Documents";
